library(tidyverse)
## ── Attaching packages ──────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ─────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
surveys_complete <- read_csv("data/surveys_complete.csv")
## Parsed with column specification:
## cols(
## record_id = col_double(),
## month = col_double(),
## day = col_double(),
## year = col_double(),
## plot_id = col_double(),
## species_id = col_character(),
## sex = col_character(),
## hindfoot_length = col_double(),
## weight = col_double(),
## genus = col_character(),
## species = col_character(),
## taxa = col_character(),
## plot_type = col_character()
## )
surveys_complete
## # A tibble: 30,463 x 13
## record_id month day year plot_id species_id sex hindfoot_length weight
## <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl>
## 1 845 5 6 1978 2 NL M 32 204
## 2 1164 8 5 1978 2 NL M 34 199
## 3 1261 9 4 1978 2 NL M 32 197
## 4 1756 4 29 1979 2 NL M 33 166
## 5 1818 5 30 1979 2 NL M 32 184
## 6 1882 7 4 1979 2 NL M 32 206
## 7 2133 10 25 1979 2 NL F 33 274
## 8 2184 11 17 1979 2 NL F 30 186
## 9 2406 1 16 1980 2 NL F 33 184
## 10 3000 5 18 1980 2 NL F 31 87
## # … with 30,453 more rows, and 4 more variables: genus <chr>, species <chr>,
## # taxa <chr>, plot_type <chr>
ggplot(data = surveys_complete)

ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length))

ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) +
geom_point()

# Assign plot to a variable
surveys_plot <- ggplot(data= surveys_complete,
mapping = aes(x = weight, y = hindfoot_length))
# Draw the plot
surveys_plot +
geom_point()

Challenge 1 (optional)
install.packages("hexbin")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
library("hexbin")
surveys_plot +
geom_hex()

# hexagonal strengths: ability to see when there is a higher concentration of coutnts more clearly
# hexagonal weaknesses: points may not be as accurate since they are larger due to their hexagonal shape as opposed to circles, which are smaller, and are able to give more accurate information
Building your plots iteratively
ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) +
geom_point()

ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) +
geom_point(alpha = 0.1)

ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length)) +
geom_point(alpha = 0.1, color = "blue")

ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length)) +
geom_point(alpha = 0.1, aes(color = species_id))

Challenge 2
ggplot(data = surveys_complete, mapping = aes (x = species_id, y = weight)) +
geom_point(aes(color = plot_type))

# this is a good way to this type of data since you are able to clearly see the plot type for each species
Boxplot
ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
geom_boxplot()

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
geom_boxplot(alpha = 0) +
geom_jitter(alpha = 0.3, color = "tomato")

Challenge 3
ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
geom_violin()

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
scale_y_log10() +
geom_violin()

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = hindfoot_length)) +
geom_boxplot() +
geom_jitter()

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = hindfoot_length)) +
geom_boxplot() +
geom_jitter(color = "blue")

Plotting Time Series Data
yearly_counts <- surveys_complete %>%
count(year, genus)
ggplot(data = yearly_counts, aes(x = year, y = n)) +
geom_line()

ggplot(data = yearly_counts, aes(x = year, y = n, group = genus)) +
geom_line()

ggplot(data = yearly_counts, aes(x = year, y = n, color = genus)) +
geom_line()

Integrating the pipe operator with ggplot2
yearly_counts %>%
ggplot(mapping = aes(x = year, y = n, color = genus)) +
geom_line()

yearly_counts_graph <- surveys_complete %>%
count(year, genus) %>%
ggplot(mapping = aes(x = year, y = n, color = genus)) +
geom_line()
yearly_counts_graph

Faceting
ggplot(data = yearly_counts, aes(x = year, y = n)) +
geom_line() +
facet_wrap(facets = vars(genus))

yearly_sex_counts <- surveys_complete %>%
count(year, genus, sex)
ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_wrap(facets = vars(genus))

ggplot(data = yearly_sex_counts,
mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_grid(rows = vars(sex), cols = vars(genus))

# One column, facet by rows
ggplot(data = yearly_sex_counts,
mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_grid(rows = vars(genus))

# One row, facet by column
ggplot(data = yearly_sex_counts,
mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_grid(cols = vars(genus))

ggplot2 Themes
ggplot(data = yearly_sex_counts,
mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_wrap(vars(genus)) +
theme_bw()

yearly_weight <- surveys_complete %>%
group_by(year, species_id) %>%
summarize(avg_weight = mean(weight))
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
ggplot(data = yearly_weight, mapping = aes(x = year, y = avg_weight)) +
geom_line() +
facet_wrap(vars(species_id)) +
theme_bw()

Customization
ggplot(data = yearly_sex_counts, aes(x = year, y = n, color = sex)) +
geom_line() +
facet_wrap(vars(genus)) +
labs(title = "Observed genera through time",
x = "Year of observation",
y = "Number of individuals") +
theme_bw()

ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_wrap(vars(genus)) +
labs(title = "Observations through time",
x = "Years of observation",
y = "Number of individuals") +
theme_bw() +
theme(text=element_text(size = 16))

ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_wrap(vars(genus)) +
labs(title = "Observed genera through time",
x = "Years of observation",
y = "Number of individuals") +
theme_bw() +
theme(axis.text.x = element_text(colour = "grey20", size = 12, angle = 90, hjust = 0.5, vjust = 0.5),
axis.text.y = element_text(colour = "grey20", size = 12),
strip.text = element_text(face = "italic"),
text = element_text(size = 16))

grey_theme <- theme(axis.text.x = element_text(colour = "grey20", size = 12,
angle = 90, hjust = 0.5,
vjust = 0.5),
axis.text.y = element_text(colour = "grey20", size = 12),
text = element_text(size = 16))
ggplot(surveys_complete, aes(x = species_id, y = hindfoot_length)) +
geom_boxplot() +
grey_theme

Challenge 4